In [6]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
In [7]:
## load in the hierarchy information
url = "https://raw.githubusercontent.com/bcaffo/MRIcloudT1volumetrics/master/inst/extdata/multilevel_lookup_table.txt"
multilevel_lookup = pd.read_csv(url, sep = "\t").drop(['Level5'], axis = 1)
multilevel_lookup = multilevel_lookup.rename(columns = {
    "modify"   : "roi", 
    "modify.1" : "level4",
    "modify.2" : "level3", 
    "modify.3" : "level2",
    "modify.4" : "level1"})
multilevel_lookup = multilevel_lookup[['roi', 'level4', 'level3', 'level2', 'level1']]
multilevel_lookup.head()
Out[7]:
roi level4 level3 level2 level1
0 SFG_L SFG_L Frontal_L CerebralCortex_L Telencephalon_L
1 SFG_R SFG_R Frontal_R CerebralCortex_R Telencephalon_R
2 SFG_PFC_L SFG_L Frontal_L CerebralCortex_L Telencephalon_L
3 SFG_PFC_R SFG_R Frontal_R CerebralCortex_R Telencephalon_R
4 SFG_pole_L SFG_L Frontal_L CerebralCortex_L Telencephalon_L
In [8]:
## Now load in the subject data
id = 127
subjectData = pd.read_csv("https://raw.githubusercontent.com/smart-stats/ds4bio_book/main/book/assetts/kirby21AllLevels.csv")
subjectData = subjectData.loc[(subjectData.type == 1) & (subjectData.level == 5) & (subjectData.id == id)]
subjectData = subjectData[['roi', 'volume']]
## Merge the subject data with the multilevel data
subjectData = pd.merge(subjectData, multilevel_lookup, on = "roi")
subjectData = subjectData.assign(icv = "ICV")
subjectData = subjectData.assign(comp = subjectData.volume / np.sum(subjectData.volume))
subjectData.head()
Out[8]:
roi volume level4 level3 level2 level1 icv comp
0 SFG_L 12926 SFG_L Frontal_L CerebralCortex_L Telencephalon_L ICV 0.009350
1 SFG_R 10050 SFG_R Frontal_R CerebralCortex_R Telencephalon_R ICV 0.007270
2 SFG_PFC_L 12783 SFG_L Frontal_L CerebralCortex_L Telencephalon_L ICV 0.009247
3 SFG_PFC_R 11507 SFG_R Frontal_R CerebralCortex_R Telencephalon_R ICV 0.008324
4 SFG_pole_L 3078 SFG_L Frontal_L CerebralCortex_L Telencephalon_L ICV 0.002227
In [9]:
import pandas as pd
import numpy as np

categories = ['icv','level1', 'level2', 'level3', 'level4']

newDf = pd.DataFrame()
for i in range(len(categories)-1):
    tempDf = subjectData[[categories[i],categories[i+1],'volume']]
    tempDf.columns = ['source','target','volume']
    newDf = pd.concat([newDf,tempDf])    
newDf = newDf.groupby(['source','target']).agg({'volume':'sum'}).reset_index()

label_list = list(np.unique(subjectData[categories].values))
source = newDf['source'].apply(lambda x: label_list.index(x))
target = newDf['target'].apply(lambda x: label_list.index(x))
count = newDf['volume']
In [10]:
import plotly.graph_objects as go
fig = go.Figure(data=[go.Sankey(
    # Define nodes
    node = dict(
      label =  label_list,
        pad = 20,
      thickness = 20,
#       line = dict(color = "black", width = 1.0),
#       color =  "red"
    ),

    # Add links
    link = dict(
      source =  source,
      target =  target,
      value =  count,
))])
fig.update_layout(
    autosize=True,
    width=800,
    height=2500,
    paper_bgcolor="LightSteelBlue",
)

fig.show()
In [ ]: